In [ ]:
# Purpose of the code:
    # Plot all feature values across days of study & MSFC scores on a same graph
    # plot the regression lines that can best fit to feature values and MSFC scores in visits 2 & 3
In [105]:
# necessary imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math as math
from scipy.stats import pearsonr, betai
%matplotlib inline
In [86]:
# switch to a proper directiry to access the data
pwd
Out[86]:
'/camdatalake/bronze/verily_ms/device'
In [3]:
cd /camdatalake/bronze/verily_ms/device/
/camdatalake/bronze/verily_ms/device
In [87]:
pwd
Out[87]:
'/camdatalake/bronze/verily_ms/device'
In [88]:
# check the content of the directory
ls
FeaturesDay.csv.gz                 FeatureStudy_free_living_related
FeaturesStudy.csv.gz               FeatureStudy_model_related
FeatureStudy_at_home_related       FeatureStudy_MSFC_composite_related
FeatureStudy_clinical_1_related    FeatureStudy_MSFC_related
FeatureStudy_clinical_2_related    FeatureStudy_patient_info_related
FeatureStudy_clinical_3_related    GMSSMDEVICEANDCLINICALDATASUMMARY.pdf
FeatureStudy_clinical_related      GMSSMDEVICEANDCLINICALDATASUMMARY.txt
FeatureStudy_demographics_related  GMSSMEXPECTEDMISSINGDATA.pdf
In [89]:
# download and read the data

# FeatureDay: Average value of the features for each day of study. Days are listed as 
# DayOfStudy

# FeatureStudy: Features for the entire study period.For the at home features, 
# the reported value is the median of the observed day level values.

import gzip, csv
with gzip.open("FeaturesDay.csv.gz", "rt", newline="") as file:
    FeatureDay = pd.read_csv(file, header = 0)

with gzip.open("FeaturesStudy.csv.gz", "rt", newline="") as file:
    FeatureStudy = pd.read_csv(file, header = 0)
In [90]:
# explore the dataset
FeatureDay.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1532 entries, 0 to 1531
Columns: 217 entries, user_email to demographic_model_error_3
dtypes: float64(212), int64(3), object(2)
memory usage: 2.5+ MB
In [91]:
FeatureDay.describe()
/opt/python3/lib/python3.5/site-packages/numpy/lib/function_base.py:3823: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[91]:
msfc_walk_composite_1 msfc_9hpt_composite_1 msfc_sdmt_composite_1 msfc_snellen_composite_1 msfc_composite_1 msfc_walk_composite_residual_1 msfc_9hpt_composite_residual_1 msfc_sdmt_composite_residual_1 msfc_snellen_composite_residual_1 msfc_walk_composite_2 ... hourly_temp_median_at_home hourly_temp_std_at_home hourly_temp_range_at_home hourly_amb_temp_median_at_home hourly_amb_temp_std_at_home hourly_amb_temp_range_at_home prv_sdnn demographic_model_error_1 demographic_model_error_2 demographic_model_error_3
count 1532.000000 1532.000000 1532.000000 1532.000000 1532.000000 1532.000000 1532.000000 1532.000000 1532.000000 1530.000000 ... 1285.000000 1285.000000 1.285000e+03 1285.000000 1285.000000 1285.000000 1044.000000 1532.000000 1530.000000 1304.000000
mean -0.019921 0.090237 0.144536 0.039580 0.063608 -0.080672 0.030814 0.128497 -0.036285 0.039871 ... 29.650973 0.726677 1.766848e+00 25.396385 3.906939 9.583735 41.326326 0.071111 -0.000918 0.025754
std 1.003682 0.953135 0.862097 1.015171 0.763705 0.820543 0.894932 0.785066 1.075377 0.776117 ... 2.929483 0.786577 1.732030e+00 2.653647 1.401338 3.322437 11.870807 0.724094 0.740245 0.799465
min -3.221528 -1.886624 -2.159982 -2.556372 -1.373281 -3.038226 -1.545838 -1.801496 -2.351975 -2.248677 ... 16.700000 0.000000 -3.552714e-15 16.185000 0.000000 0.000000 11.727158 -1.577726 -1.461933 -2.505707
25% -0.256647 -0.645806 -0.411009 -0.416552 -0.398860 -0.201410 -0.680438 -0.321350 -0.802429 NaN ... NaN NaN NaN NaN NaN NaN NaN -0.426405 NaN NaN
50% 0.299268 0.104155 0.135545 0.296722 0.345561 0.038386 0.180416 0.138712 -0.257036 NaN ... NaN NaN NaN NaN NaN NaN NaN 0.277935 NaN NaN
75% 0.669878 0.940655 0.900721 1.295305 0.642663 0.301486 0.790244 0.656998 0.807599 NaN ... NaN NaN NaN NaN NaN NaN NaN 0.741979 NaN NaN
max 0.924672 1.897517 1.939174 1.295305 1.137124 1.463065 1.543403 2.247295 1.817092 0.924672 ... 33.600000 6.860394 1.470500e+01 32.740000 7.682660 18.409500 114.271105 1.117692 0.987118 1.151321

8 rows × 215 columns

In [92]:
FeatureDay.head()
Out[92]:
user_email gls_subject_code msfc_walk_composite_1 msfc_9hpt_composite_1 msfc_sdmt_composite_1 msfc_snellen_composite_1 msfc_composite_1 msfc_walk_composite_residual_1 msfc_9hpt_composite_residual_1 msfc_sdmt_composite_residual_1 ... hourly_temp_median_at_home hourly_temp_std_at_home hourly_temp_range_at_home hourly_amb_temp_median_at_home hourly_amb_temp_std_at_home hourly_amb_temp_range_at_home prv_sdnn demographic_model_error_1 demographic_model_error_2 demographic_model_error_3
0 gmssm001@glsstudykit.com H800001 -0.187158 -1.886624 -1.285495 -1.486462 -1.211435 1.035492 -1.545838 0.138712 ... 32.70 0.760989 1.905 30.7575 4.330252 11.7550 26.824533 -0.924262 -1.03338 -1.525646
1 gmssm001@glsstudykit.com H800001 -0.187158 -1.886624 -1.285495 -1.486462 -1.211435 1.035492 -1.545838 0.138712 ... 32.85 0.532629 1.350 29.5875 6.815948 15.8120 24.795007 -0.924262 -1.03338 -1.525646
2 gmssm001@glsstudykit.com H800001 -0.187158 -1.886624 -1.285495 -1.486462 -1.211435 1.035492 -1.545838 0.138712 ... 31.60 0.717705 1.800 30.2200 3.545528 7.3300 28.387662 -0.924262 -1.03338 -1.525646
3 gmssm001@glsstudykit.com H800001 -0.187158 -1.886624 -1.285495 -1.486462 -1.211435 1.035492 -1.545838 0.138712 ... 30.75 1.689474 5.550 27.3600 4.984148 13.5820 31.598062 -0.924262 -1.03338 -1.525646
4 gmssm001@glsstudykit.com H800001 -0.187158 -1.886624 -1.285495 -1.486462 -1.211435 1.035492 -1.545838 0.138712 ... 30.65 0.830880 2.205 30.6825 4.699070 13.8555 23.523694 -0.924262 -1.03338 -1.525646

5 rows × 217 columns

In [93]:
# extract names of features in the dataset
list(FeatureDay.columns)
Out[93]:
['user_email',
 'gls_subject_code',
 'msfc_walk_composite_1',
 'msfc_9hpt_composite_1',
 'msfc_sdmt_composite_1',
 'msfc_snellen_composite_1',
 'msfc_composite_1',
 'msfc_walk_composite_residual_1',
 'msfc_9hpt_composite_residual_1',
 'msfc_sdmt_composite_residual_1',
 'msfc_snellen_composite_residual_1',
 'msfc_walk_composite_2',
 'msfc_9hpt_composite_2',
 'msfc_sdmt_composite_2',
 'msfc_snellen_composite_2',
 'msfc_composite_2',
 'msfc_walk_composite_residual_2',
 'msfc_9hpt_composite_residual_2',
 'msfc_sdmt_composite_residual_2',
 'msfc_snellen_composite_residual_2',
 'msfc_walk_composite_3',
 'msfc_9hpt_composite_3',
 'msfc_sdmt_composite_3',
 'msfc_snellen_composite_3',
 'msfc_composite_3',
 'msfc_walk_composite_residual_3',
 'msfc_9hpt_composite_residual_3',
 'msfc_sdmt_composite_residual_3',
 'msfc_snellen_composite_residual_3',
 'sex',
 'height_inch',
 'weight_lbs',
 'bmi',
 'age_years',
 'dayofstudy',
 'movement_rate',
 'duration_movement_count',
 'stance',
 'swing',
 'turn_angle',
 'turn_duration',
 'turn_vel_max',
 'turn_vel_mean',
 'turn_vel_std',
 'turn_angle_ankle',
 'turn_duration_ankle',
 'turn_vel_max_ankle',
 'turn_vel_mean_ankle',
 'turn_vel_std_ankle',
 'mean_pvt_delay_1_clinic_1',
 'mean_pvt_delay_3_clinic_1',
 'mean_pvt_delay_5_clinic_1',
 'mean_pvt_delay_7_clinic_1',
 'mean_pvt_delay_clinic_1',
 'mean_pvt_delay_1_clinic_2',
 'mean_pvt_delay_3_clinic_2',
 'mean_pvt_delay_5_clinic_2',
 'mean_pvt_delay_7_clinic_2',
 'mean_pvt_delay_clinic_2',
 'fatigue_level_clinic_2',
 'mean_pvt_delay_1_at_home',
 'mean_pvt_delay_3_at_home',
 'mean_pvt_delay_5_at_home',
 'mean_pvt_delay_7_at_home',
 'mean_pvt_delay_at_home',
 'fatigue_level_at_home',
 'mean_pvt_delay_1_clinic_3',
 'mean_pvt_delay_3_clinic_3',
 'mean_pvt_delay_5_clinic_3',
 'mean_pvt_delay_7_clinic_3',
 'mean_pvt_delay_clinic_3',
 'fatigue_level_clinic_3',
 'duration_rem_count',
 'rem_epochs',
 'rem_percent',
 'walk_minutes',
 'idle_minutes',
 'pq_nondominant_median_clinic_1',
 'pq_dominant_median_clinic_1',
 'zx_nondominant_median_clinic_1',
 'zx_dominant_median_clinic_1',
 'zx_nondominant_num_correct_clinic_1',
 'zx_dominant_num_correct_clinic_1',
 'zx_nondominant_rhythm_clinic_1',
 'zx_dominant_rhythm_clinic_1',
 'pq_nondominant_rhythm_clinic_1',
 'pq_dominant_rhythm_clinic_1',
 'pq_nondominant_median_clinic_2',
 'pq_dominant_median_clinic_2',
 'zx_nondominant_median_clinic_2',
 'zx_dominant_median_clinic_2',
 'zx_nondominant_num_correct_clinic_2',
 'zx_dominant_num_correct_clinic_2',
 'zx_nondominant_rhythm_clinic_2',
 'zx_dominant_rhythm_clinic_2',
 'pq_nondominant_rhythm_clinic_2',
 'pq_dominant_rhythm_clinic_2',
 'pq_nondominant_median_clinic_3',
 'pq_dominant_median_clinic_3',
 'zx_nondominant_median_clinic_3',
 'zx_dominant_median_clinic_3',
 'zx_nondominant_num_correct_clinic_3',
 'zx_dominant_num_correct_clinic_3',
 'zx_nondominant_rhythm_clinic_3',
 'zx_dominant_rhythm_clinic_3',
 'pq_nondominant_rhythm_clinic_3',
 'pq_dominant_rhythm_clinic_3',
 'pq_nondominant_median_at_home',
 'pq_dominant_median_at_home',
 'zx_nondominant_median_at_home',
 'zx_dominant_median_at_home',
 'zx_nondominant_num_correct_at_home',
 'zx_dominant_num_correct_at_home',
 'zx_nondominant_rhythm_at_home',
 'zx_dominant_rhythm_at_home',
 'pq_nondominant_rhythm_at_home',
 'pq_dominant_rhythm_at_home',
 'mobility_stance_clinic_1',
 'mobility_swing_clinic_1',
 'turn_angle_clinic_1',
 'turn_duration_clinic_1',
 'turn_vel_max_clinic_1',
 'turn_vel_mean_clinic_1',
 'turn_vel_std_clinic_1',
 'turn_angle_ankle_clinic_1',
 'turn_duration_ankle_clinic_1',
 'turn_vel_max_ankle_clinic_1',
 'turn_vel_mean_ankle_clinic_1',
 'turn_vel_std_ankle_clinic_1',
 'sway_dist_lr_clinic_1',
 'sway_dist_ap_clinic_1',
 'sway_disp_lr_clinic_1',
 'sway_disp_ap_clinic_1',
 'mobility_activity_clinic_1_time',
 'mobility_stance_clinic_2',
 'mobility_swing_clinic_2',
 'turn_angle_clinic_2',
 'turn_duration_clinic_2',
 'turn_vel_max_clinic_2',
 'turn_vel_mean_clinic_2',
 'turn_vel_std_clinic_2',
 'turn_angle_ankle_clinic_2',
 'turn_duration_ankle_clinic_2',
 'turn_vel_max_ankle_clinic_2',
 'turn_vel_mean_ankle_clinic_2',
 'turn_vel_std_ankle_clinic_2',
 'sway_dist_lr_clinic_2',
 'sway_dist_ap_clinic_2',
 'sway_disp_lr_clinic_2',
 'sway_disp_ap_clinic_2',
 'mobility_activity_clinic_2_time',
 'mobility_stance_at_home',
 'mobility_swing_at_home',
 'turn_angle_at_home',
 'turn_duration_at_home',
 'turn_vel_max_at_home',
 'turn_vel_mean_at_home',
 'turn_vel_std_at_home',
 'turn_angle_ankle_at_home',
 'turn_duration_ankle_at_home',
 'turn_vel_max_ankle_at_home',
 'turn_vel_mean_ankle_at_home',
 'turn_vel_std_ankle_at_home',
 'sway_dist_lr_at_home',
 'sway_dist_ap_at_home',
 'sway_disp_lr_at_home',
 'sway_disp_ap_at_home',
 'mobility_activity_at_home_time',
 'mobility_stance_clinic_3',
 'mobility_swing_clinic_3',
 'turn_angle_clinic_3',
 'turn_duration_clinic_3',
 'turn_vel_max_clinic_3',
 'turn_vel_mean_clinic_3',
 'turn_vel_std_clinic_3',
 'turn_angle_ankle_clinic_3',
 'turn_duration_ankle_clinic_3',
 'turn_vel_max_ankle_clinic_3',
 'turn_vel_mean_ankle_clinic_3',
 'turn_vel_std_ankle_clinic_3',
 'sway_dist_lr_clinic_3',
 'sway_dist_ap_clinic_3',
 'sway_disp_lr_clinic_3',
 'sway_disp_ap_clinic_3',
 'mobility_activity_clinic_3_time',
 'hrv_sdnn_clinic_1',
 'hrv_sdnn_clinic_2',
 'hrv_sdnn_at_home',
 'hrv_sdnn_clinic_3',
 'hourly_temp_median_clinic_1',
 'hourly_temp_std_clinic_1',
 'hourly_temp_range_clinic_1',
 'hourly_amb_temp_median_clinic_1',
 'hourly_amb_temp_std_clinic_1',
 'hourly_amb_temp_range_clinic_1',
 'hourly_temp_median_clinic_2',
 'hourly_temp_std_clinic_2',
 'hourly_temp_range_clinic_2',
 'hourly_amb_temp_median_clinic_2',
 'hourly_amb_temp_std_clinic_2',
 'hourly_amb_temp_range_clinic_2',
 'hourly_temp_median_clinic_3',
 'hourly_temp_std_clinic_3',
 'hourly_temp_range_clinic_3',
 'hourly_amb_temp_median_clinic_3',
 'hourly_amb_temp_std_clinic_3',
 'hourly_amb_temp_range_clinic_3',
 'hourly_temp_median_at_home',
 'hourly_temp_std_at_home',
 'hourly_temp_range_at_home',
 'hourly_amb_temp_median_at_home',
 'hourly_amb_temp_std_at_home',
 'hourly_amb_temp_range_at_home',
 'prv_sdnn',
 'demographic_model_error_1',
 'demographic_model_error_2',
 'demographic_model_error_3']
In [94]:
# found list of unique IDs for patients
patient_IDs = list(FeatureDay['gls_subject_code'].unique())
patient_IDs
Out[94]:
['H800001',
 'H800002',
 'H800003',
 'H800004',
 'H800006',
 'H800007',
 'H800008',
 'H800009',
 'H800010',
 'H800011',
 'H800013',
 'H800014',
 'H800016',
 'H800017',
 'H800018',
 'H800019',
 'H800020',
 'H800021',
 'H800022',
 'H800023',
 'H800024',
 'H800025',
 'H800005',
 'H800012',
 'H800015']
In [95]:
# 10 free living feature with high correlation

free_living_features_highly_correlated = ['idle_minutes',
 'turn_vel_std_ankle',
 'swing',
 'stance',
 'duration_movement_count',
 'turn_vel_max_ankle',
 'turn_duration_ankle',
 'duration_rem_count',
 'rem_percent',
 'movement_rate']
In [96]:
# 19 highly correlated at home features (structured activity)

at_home_features_highly_correlated = ['mean_pvt_delay_7_at_home',
 'mobility_stance_at_home',
 'mean_pvt_delay_at_home',
 'pq_nondominant_rhythm_at_home',
 'pq_nondominant_median_at_home',
 'pq_dominant_rhythm_at_home',
 'turn_vel_max_at_home',
 'mobility_swing_at_home',
 'zx_dominant_num_correct_at_home',
 'turn_vel_std_at_home',
 'turn_duration_ankle_at_home',
 'turn_vel_max_ankle_at_home',
 'mean_pvt_delay_5_at_home',
 'zx_nondominant_median_at_home',
 'zx_nondominant_num_correct_at_home',
 'mean_pvt_delay_3_at_home',
 'turn_vel_std_ankle_at_home',
 'mobility_activity_at_home_time',
 'mean_pvt_delay_1_at_home']
In [97]:
# features related to MSFC scores (all components including composite scores)

FeatureStudy_columns_MSFC_col_names = ['msfc_walk_composite_1',
 'msfc_9hpt_composite_1',
 'msfc_sdmt_composite_1',
 'msfc_snellen_composite_1',
 'msfc_composite_1',
 'msfc_walk_composite_residual_1',
 'msfc_9hpt_composite_residual_1',
 'msfc_sdmt_composite_residual_1',
 'msfc_snellen_composite_residual_1',
 'msfc_walk_composite_2',
 'msfc_9hpt_composite_2',
 'msfc_sdmt_composite_2',
 'msfc_snellen_composite_2',
 'msfc_composite_2',
 'msfc_walk_composite_residual_2',
 'msfc_9hpt_composite_residual_2',
 'msfc_sdmt_composite_residual_2',
 'msfc_snellen_composite_residual_2',
 'msfc_walk_composite_3',
 'msfc_9hpt_composite_3',
 'msfc_sdmt_composite_3',
 'msfc_snellen_composite_3',
 'msfc_composite_3',
 'msfc_walk_composite_residual_3',
 'msfc_9hpt_composite_residual_3',
 'msfc_sdmt_composite_residual_3',
 'msfc_snellen_composite_residual_3']
In [98]:
# features related to MSFC scores (composite scores)
FeatureStudy_columns_MSFC_composite_col_names = ['msfc_composite_1', 'msfc_composite_2', 'msfc_composite_3']
In [99]:
# breaking down FeatureDay dataframe
FeatureDay_free_living = FeatureDay[free_living_features_highly_correlated]
FeatureDay_at_home = FeatureDay[at_home_features_highly_correlated]
FeaFeatureDay_MSFC_score_all = FeatureDay[FeatureStudy_columns_MSFC_col_names]
FeaFeatureDay_MSFC_score_composite = FeatureDay[FeatureStudy_columns_MSFC_composite_col_names]
In [110]:
# create a data frame with patients' IDs, msfc_composite_2 and mafc_composite_3
patient_ID = []
visit2_MSFC_composite = []
visit3_MSFC_composite = []
for idx in range(len(patient_IDs)):
    ID = patient_IDs[idx]
    df = FeatureDay[FeatureDay['gls_subject_code'] == ID][['msfc_composite_2', 'msfc_composite_3']]
    patient_ID.append(ID)
    visit2_MSFC_composite.append(df.iloc[0]['msfc_composite_2'])
    visit3_MSFC_composite.append(df.iloc[0]['msfc_composite_3'])
   
all_data = []
all_data.append(patient_ID)
all_data.append(visit2_MSFC_composite)
all_data.append(visit3_MSFC_composite)
all_data = list(zip(*all_data))
df_MSFC_composite = pd.DataFrame(all_data,columns=['patient_ID','visit2_MSFC_composite','visit3_MSFC_composite'])
df_MSFC_composite.head()
Out[110]:
patient_ID visit2_MSFC_composite visit3_MSFC_composite
0 H800001 -1.385647 -1.407630
1 H800002 1.109154 0.631568
2 H800003 0.932657 0.911542
3 H800004 -0.983236 -0.695738
4 H800006 0.568276 0.495086
In [107]:
def remove_outliers(feature_values, day_of_study):
    # a function to remove outliers from input dataset and return filtered dataset as the ouput
    m = 2 # stance threshold from the mean
    mean = feature_values.mean()
    std = feature_values.std()
    tuples = list(zip(feature_values,day_of_study))
    filtered_values = []
    for (x,y) in tuples:
        if (x >= mean - m * std) & (x <= mean + m * std):
            filtered_values.append((x,y))
    unzip_filtered_values = list(zip(*filtered_values))
    # check for missing values
    if len(unzip_filtered_values) > 0:
        return pd.Series(list(unzip_filtered_values[0])), pd.Series(list(unzip_filtered_values[1]))
    else:
        return pd.Series([]),day_of_study
In [108]:
def standardize_axis(feature):
    # a function to standardize the axis
        # remove outliers (both feature values & associated days of study), return filtered values
        # use the filtered values to assign a range to axis
        # we assume dataframes FeatureDay and patient_IDs are already defined
    All_filtered_feature_values = []
    All_filtered_days_of_studies = []
    # loop on all the patients
    for ID in patient_IDs:
        # extract part of FeatureDay that is related to a patient and input feature
        col_1 = feature
        col_2 = 'dayofstudy'
        df = FeatureDay[FeatureDay['gls_subject_code'] == ID][[col_1,col_2]]
        # sort the dataframe based on days of study
        df.sort(col_2, inplace = True)
        # create list of x: days of study, y: feature values
        x = df[col_2]
        y = df[col_1]
        # remove outliers (both feature values & associated day of study)
        y,x = remove_outliers(y,x)
        # store all the filtered values
        All_filtered_feature_values = All_filtered_feature_values + (pd.Series.tolist(y))
        All_filtered_days_of_studies = All_filtered_days_of_studies + (pd.Series.tolist(x))
    # set the axis ranges to the max value in the list of filtered values
    max_y = (np.max(All_filtered_feature_values))
    max_x = (np.max(All_filtered_days_of_studies))
    # return the axis ranges
    return max_y,max_x
In [113]:
def plot_feature_across_days_and_composite_scores(feature):
    # plot the measurments for a specific feature vs. days
    # plot MSFC composite scores for visits 2 and 3 on a same graph
    figs, axes = plt.subplots(nrows= 5, ncols= 5,figsize=(20,20),dpi = 200)
    print(feature)
    # plot the measurments vs. days of study
    for idx in range(len(patient_IDs)):
            # extract the patient ID
            ID = patient_IDs[idx]
            # extract two columns as a dataframe
            col_1 = feature
            col_2 = 'dayofstudy'
            df = FeatureDay[FeatureDay['gls_subject_code'] == ID][[col_1,col_2]]
            # sort the dataframe based on days of study
            df.sort(col_2, inplace = True)
            x = df[col_2]
            y = df[col_1]
            # set the row and column numbers based on the fact that we have 25 patients
            row = idx // 5
            col = idx % 5
            # standardize the axis
            max_y,max_x = standardize_axis(feature)
            axes[row,col].set_xlim(0, max_x)
            # MSFC scores can be negative
            # standardize the axis to consider negative scores
            axes[row,col].set_ylim(-1*max_y, max_y)
            axes[row,col].set_title(ID,y=0.9)
            axes[row,col].set_xlabel('Days of Study')
            axes[row,col].set_ylabel(feature)
            # plot the measurments vs. days of study
            if (len(y.unique()) == 1) & (np.isnan(y.unique()).sum() == 1):
                pass
            else:
                y,x = remove_outliers(y,x)
                if len(y) == 0:
                    pass
                else:
                    sns.regplot(x,y,ax=axes[row,col],label = 'device')
                    axes[row,col].set_xlabel('Days of Study')
                    axes[row,col].set_ylabel(feature)
            # plot MSFC scores for visits 2 & 3 on the same graph
            col_1_new = 'visit2_MSFC_composite'
            col_2_new = 'visit3_MSFC_composite'
            x_new = [0,max_x]
            y_new = list(df_MSFC_composite[df_MSFC_composite['patient_ID'] == ID][[col_1_new,col_2_new]].iloc[0])
            # MSFC scores are very small. Scale them.
            y_new = [(max_y/2) * i for i in y_new] 
            axes[row,col].plot(x_new,y_new,label='MSFC')
            axes[row,col].legend(loc = 3)
In [ ]:
# plot free living features over days of study
# plot the regression line that can fit to feature values
# plot a regression line for MSFC composite scores for visits 2 and 3
In [114]:
feature = free_living_features_highly_correlated[0]
plot_feature_across_days_and_composite_scores(feature)
idle_minutes
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [115]:
feature = free_living_features_highly_correlated[1]
plot_feature_across_days_and_composite_scores(feature)
turn_vel_std_ankle
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [116]:
feature = free_living_features_highly_correlated[2]
plot_feature_across_days_and_composite_scores(feature)
swing
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [117]:
feature = free_living_features_highly_correlated[3]
plot_feature_across_days_and_composite_scores(feature)
stance
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [118]:
feature = free_living_features_highly_correlated[4]
plot_feature_across_days_and_composite_scores(feature)
duration_movement_count
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [119]:
feature = free_living_features_highly_correlated[5]
plot_feature_across_days_and_composite_scores(feature)
turn_vel_max_ankle
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [120]:
feature = free_living_features_highly_correlated[6]
plot_feature_across_days_and_composite_scores(feature)
turn_duration_ankle
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [121]:
feature = free_living_features_highly_correlated[7]
plot_feature_across_days_and_composite_scores(feature)
duration_rem_count
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [122]:
feature = free_living_features_highly_correlated[8]
plot_feature_across_days_and_composite_scores(feature)
rem_percent
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [123]:
feature = free_living_features_highly_correlated[9]
plot_feature_across_days_and_composite_scores(feature)
movement_rate
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [ ]:
# plot at home (structured activity) features over days of study
# plot the regression line that can fit to feature values
# plot a regression line for MSFC composite scores for visits 2 and 3
In [125]:
feature = at_home_features_highly_correlated[0]
plot_feature_across_days_and_composite_scores(feature)
mean_pvt_delay_7_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [126]:
feature = at_home_features_highly_correlated[1]
plot_feature_across_days_and_composite_scores(feature)
mobility_stance_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [127]:
feature = at_home_features_highly_correlated[2]
plot_feature_across_days_and_composite_scores(feature)
mean_pvt_delay_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [128]:
feature = at_home_features_highly_correlated[3]
plot_feature_across_days_and_composite_scores(feature)
pq_nondominant_rhythm_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [129]:
feature = at_home_features_highly_correlated[4]
plot_feature_across_days_and_composite_scores(feature)
pq_nondominant_median_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [130]:
feature = at_home_features_highly_correlated[5]
plot_feature_across_days_and_composite_scores(feature)
pq_dominant_rhythm_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [131]:
feature = at_home_features_highly_correlated[6]
plot_feature_across_days_and_composite_scores(feature)
turn_vel_max_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [132]:
feature = at_home_features_highly_correlated[7]
plot_feature_across_days_and_composite_scores(feature)
mobility_swing_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [133]:
feature = at_home_features_highly_correlated[8]
plot_feature_across_days_and_composite_scores(feature)
zx_dominant_num_correct_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [134]:
feature = at_home_features_highly_correlated[9]
plot_feature_across_days_and_composite_scores(feature)
turn_vel_std_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [135]:
feature = at_home_features_highly_correlated[10]
plot_feature_across_days_and_composite_scores(feature)
turn_duration_ankle_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [136]:
feature = at_home_features_highly_correlated[11]
plot_feature_across_days_and_composite_scores(feature)
turn_vel_max_ankle_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [137]:
feature = at_home_features_highly_correlated[12]
plot_feature_across_days_and_composite_scores(feature)
mean_pvt_delay_5_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [138]:
feature = at_home_features_highly_correlated[13]
plot_feature_across_days_and_composite_scores(feature)
zx_nondominant_median_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [139]:
feature = at_home_features_highly_correlated[14]
plot_feature_across_days_and_composite_scores(feature)
zx_nondominant_num_correct_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [140]:
feature = at_home_features_highly_correlated[15]
plot_feature_across_days_and_composite_scores(feature)
mean_pvt_delay_3_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [141]:
feature = at_home_features_highly_correlated[16]
plot_feature_across_days_and_composite_scores(feature)
turn_vel_std_ankle_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [142]:
feature = at_home_features_highly_correlated[17]
plot_feature_across_days_and_composite_scores(feature)
mobility_activity_at_home_time
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [143]:
feature = at_home_features_highly_correlated[18]
plot_feature_across_days_and_composite_scores(feature)
mean_pvt_delay_1_at_home
/opt/python3/lib/python3.5/site-packages/ipykernel/__main__.py:15: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
In [ ]: